Here I am using MuSiC for deconvolution of data. Unlike Cibersort, this method does not require selection of marker genes (https://doi.org/10.1038/s41467-018-08023-x). I am using samples from Fitzgerald lab (BE, NE, NG and EAC) and from the oesophageal TCGA project (TCGA-ESCA) that has a mixture of adonomarcinomas and squamous cancers.
# Environment setup
library(scran)
library(MuSiC)
library(pheatmap)
library(Matrix)
library(xbioc)
library(RColorBrewer)
library(reshape2)
library(Rtsne)
source("../Analysis/Functions/auxiliary.R")
# input data
files.dir <- "~/Dropbox/Postdoc/2019-12-29_BE2020/RNA-seq/"
# output data location
files.out <- "~/Dropbox/Postdoc/2019-12-29_BE2020/Figures/"
# image annotation
anno_col <- list(Project.ID = c(Gastric = "#4DBBD5FF", Squamous = "darkred", `Barrett's` = "#00A087FF",
`ICGC-ESAD` = "#FF0DFFFF", `TCGA-ESCA` = "#15FF0DFF"), Tissue = c(NG = "#4DBBD5FF",
NE = "darkred", BE = "#00A087FF", ND = "#3C5488FF", SMG = "#B09C85FF", ALL = "grey80"))
# read all data
sce <- readRDS("~/Dropbox/Postdoc/2019-12-29_BE2020/All_corrected_sce_filtered.rds")
# Exclude contaminating cells
sce <- sce[, sce$include]
# keep only cells from pure tissue types
cur.sce <- sce[, colData(sce)$Tissue %in% c("NG", "NE", "BE", "SMG")]
# replace '_' with '-' in names of cell types
cur.sce$cell_type <- gsub("_", "-", cur.sce$cell_type)
# replace '_' with '-' in names of cell types
cur.sce$cell_type_secondary <- gsub("_", "-", cur.sce$cell_type_secondary)
# keep only samples with 300 cells per tissue
cur.sce$Patient_tissue <- paste(cur.sce$Patient, cur.sce$Tissue, sep = "_")
cell.counts <- table(paste(cur.sce$Patient, cur.sce$Tissue, sep = "_"))
cur.sce <- cur.sce[, cur.sce$Patient_tissue %in% names(cell.counts[cell.counts >
300])]
# I will also remove Myoepithelial cells (very small number of cells from a
# single patient)
cur.sce <- cur.sce[, !(cur.sce$cell_type %in% c("Myo-epithelial"))]
# Identify overlapping cells and assign the same type of immune and stromal cells
# to the same clusters of cells across tissues Batch correction
sce.list <- split.sce(cur.sce, unique(cur.sce$Sample), colData.name = "Sample")
# Order sce objects for batch correction
sce.list <- sce.list[order(unlist(lapply(sce.list, ncol)), decreasing = TRUE)]
n <- names(sce.list)
sce.list <- sce.list[c(which(grepl("NE_out", n)), which(grepl("NG_out", n)), which(grepl("BE_out",
n)), which(grepl("SMG_out", n)))]
corrected <- batch.correction(sce.list)
cur.sce <- do.call("cbind", sce.list)
# Compute new tSNE
set.seed(1234)
tsne <- Rtsne(t(corrected), pca = FALSE, perplexity = 250)
reducedDims(cur.sce)$TSNE <- tsne$Y
# Clustering on corrected data
g <- buildSNNGraph(corrected, k = 10)
clusters <- igraph::cluster_louvain(g)$membership
# Check what clusters overlap with nonepithelial cell types
tmp <- reshape2::dcast(plyr::count(cbind(clusters, cur.sce$tissue_type)), formula = x.clusters ~
x.V2, fill = 0)
tmp <- tmp$x.clusters[tmp$NonEpithelial == apply(as.matrix(tmp[, 2:5]), 1, max)]
# Identify the type epithelial cells
tmp2 <- reshape2::dcast(plyr::count(cbind(clusters[clusters %in% tmp], cur.sce$cell_type_secondary[clusters %in%
tmp])), formula = x.1 ~ x.2, fill = 0)
tmp3 <- colnames(tmp2)[apply(tmp2, 1, which.max)]
names(tmp3) <- tmp2$x.1
# replace the names of the non-epithelial cells if they don't match the type
# across tissues
for (i in names(tmp3)) {
cur.sce$cell_type_secondary[clusters == i] <- tmp3[i]
}
# replace tissue names for a generic names for immune and stromal cells
cur.sce$Tissue[cur.sce$cell_type %in% c("Immune", "Stromal")] <- "ALL"
# Visualize tsne
metadata(cur.sce)$colour_vector["ALL"] <- "black"
p.all.cells <- ggplot(data.frame(tsne1 = reducedDims(cur.sce)$TSNE[, 1], tsne2 = reducedDims(cur.sce)$TSNE[,
2], tissue = colData(cur.sce)$Tissue)) + geom_point(aes(tsne1, tsne2), colour = "black",
size = 1) + geom_point(aes(tsne1, tsne2, colour = tissue), size = 0.5) + scale_color_manual(values = metadata(cur.sce)$colour_vector) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "grey"))
p.all.cells
# Visualize tsne cell type
p.all.cells <- ggplot(data.frame(tsne1 = reducedDims(cur.sce)$TSNE[, 1], tsne2 = reducedDims(cur.sce)$TSNE[,
2], cell.type = colData(cur.sce)$cell_type_secondary)) + geom_point(aes(tsne1,
tsne2), colour = "black", size = 1) + geom_point(aes(tsne1, tsne2, colour = cell.type),
size = 0.5) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "grey"))
p.all.cells
# tracking names
cur.sce$cell.types <- (paste0(cur.sce$Tissue, "_", cur.sce$cell_type_secondary))
cur.sce$cell.groups <- (paste0(cur.sce$Tissue, "_", cur.sce$cell_type_secondary,
"_", cur.sce$Patient))
# create phenotype matrix
pheno.matrix <- colData(cur.sce)[, c(3, 4, 16, 17, 18, 19, 23, 24)]
# annotate phenotypes
metadata <- data.frame(labelDescription = c("Patient ID", "Tissue ID", "Tissue cluster",
"Cell Type", "Cell Type Detail", "Tissue Type", "Cell Type 2", "Cell Group"),
row.names = colnames(pheno.matrix))
# create Expression set for the input into MuSiC
cur.sce1 <- ExpressionSet(assayData = as.matrix(counts(cur.sce)), phenoData = new("AnnotatedDataFrame",
data = as.data.frame(pheno.matrix), varMetadata = metadata))
# EAC and normal data (linear scale). Contains only normal samples that were used
# in figure 4
bulk.data <- readRDS(paste0(files.dir, "/EAC-all/count_codingGenes_clean.rds"))
# replace gene ID into Ensembl ID
IDs <- rowData(sce)[rowData(sce)$Symbol %in% rownames(bulk.data), 1:2]
bulk.data <- bulk.data[IDs$Symbol, ]
rownames(bulk.data) <- IDs$ID
#get information about the samples from EAC
rowID_EAC<-readRDS(paste0(files.dir,"//EAC-all/samplesAnnotation_clean.rds"))
# get location of genes counts (to make it compatible with data used for MuSiC I
# will get the same list of samples)
location <- paste0(files.dir, "/TCGA/")
anno <- read.delim(paste0(location, "gdc_sample_sheet.2019-04-16_FPKM.tsv")) #It has FPKM in the name but the annotation of patients is the same for both raw and FPKM data
# keep only cancer data for ESCA project
anno.all <- anno[anno$Project.ID == "TCGA-ESCA" & anno$Sample.Type == "Primary Tumor",
]
# get location of genes counts (linear data)
anno <- read.delim(paste0(location, "gdc_sample_sheet.2019-04-16.tsv"))
anno.all <- anno[anno$Sample.ID %in% anno.all$Sample.ID, ]
# get data for cancers
data.all <- as.data.frame(matrix(ncol = nrow(anno.all), nrow = nrow(read.table(gzfile(paste0(location,
"/Data_counts/", anno.all[1, 1], "/", anno.all[1, 2])), row.names = 1))))
rownames(data.all) <- rownames(read.table(gzfile(paste0(location, "/Data_counts/",
anno.all[1, 1], "/", anno.all[1, 2])), row.names = 1))
colnames(data.all) <- anno.all$Sample.ID
# Read all data
for (i in colnames(data.all)) {
tmp <- read.table(gzfile(paste0(location, "/Data_counts/", anno.all[anno.all$Sample.ID ==
i, 1], "/", anno.all[anno.all$Sample.ID == i, 2])), row.names = 1)
data.all[, i] <- tmp[rownames(data.all), 1]
}
rownames(data.all) <- sapply(rownames(data.all), function(x) unlist(strsplit(x, split = "[.]"))[1])
# merge Cambridge and TCGA data
bulk.data <- merge(bulk.data, data.all, by = 0)
colnames(bulk.data)[1] <- "Gene"
rownames(bulk.data) <- bulk.data[, 1]
bulk.data <- bulk.data[, colnames(bulk.data) != "Gene"]
#keep only data for sample group and clinical info about Barrett's next to cancer
rowID<-rowID_EAC[,c(6,1)]
colnames(rowID)[1]<-"Project.ID"
#create new column for normal and cancer samples
# Project.ID has information about the proejct in line of TCGA project ID
rowID$Project.ID[rowID$Project.ID == "EAC"]<- "ICGC-ESAD"
rowID$Project.ID[rowID$Project.ID == "Barretts"]<- "Barrett's"
rowID$Sample.Type<-ifelse(rowID$Project.ID == "ICGC-ESAD", "Primary Tumor", "Solid Tissue Normal")
#Create primary diagnosis column
rowID$primary_diagnosis[rowID$Project.ID == "ICGC-ESAD"]<-"Adenocarcinoma"
rowID$primary_diagnosis[rowID$Project.ID != "ICGC-ESAD"]<-rowID$Project.ID[rowID$Project.ID != "ICGC-ESAD"]
rowID$site_of_resection_or_biopsy<-"ND"
rowID$Case.ID<-row.names(rowID)
rowID$Sample.ID<-rowID$Case.ID
#get information about TCGA patients
rowID.data<-read.table(paste0(location,"//clinical.tsv"), header = TRUE, sep = "\t", stringsAsFactors = FALSE)
#get information about TCGA samples
rowID.data2<-read.delim(paste0("/home/karolno/Dropbox/Postdoc/2019-04-15_Cancer-BE/TCGA/", "gdc_sample_sheet.2019-04-16.tsv"), stringsAsFactors = FALSE)
#merge
samples<-rowID.data2[rowID.data2$Sample.ID %in% colnames(data.all), 5:8 ]
patients<-rowID.data[rowID.data$submitter_id %in% rowID.data2$Case.ID[rowID.data2$Sample.ID %in% samples$Sample.ID], c(2,11, 24)]
#combine types of cancer for simplicity
patients[grep("quamous", patients$primary_diagnosis),2]<-"Squamous Carcinoma"
patients[grep("denocar", patients$primary_diagnosis),2]<-"Adenocarcinoma"
#merge all data into a single dataframe
rowID_all<-merge(samples, patients, all.x = TRUE, by.x = "Case.ID", by.y = "submitter_id")
rownames(rowID_all)<-rowID_all$Sample.ID
rowID_all$RP.BarrettsAdjacent<-"unknown"
# Combine Cambridge and TCGA data
rowID_combined<-rbind(rowID, rowID_all)
#remove normal samples with Duodenum data
rowID_combined<-rowID_combined[rowID_combined$Project.ID != "Duodenum", ]
rowID_print<-rowID_combined[,c(1,2,3,4)]
metadata2<-data.frame(labelDescription= c( "Study ID", "Barrett's Status", "Sample Type", "Diagnosis", "Site of resection", "Patient ID", "Sample ID"), row.names=colnames(rowID_combined))
#create expression set
data.all1<-ExpressionSet(assayData = as.matrix(bulk.data[,rownames(rowID_combined)]), phenoData = new("AnnotatedDataFrame", data = rowID_combined, varMetadata = metadata2) )
# read music scripts
cell.prop <- music_prop(bulk.eset = data.all1, sc.eset = cur.sce1, clusters = "cell.types",
samples = "Patient", select.ct = unique(cur.sce1$cell.types))
# saveRDS(cell.prop, file = paste0(files.dir,'/EAC-all/cell-prop.rds'))
plot.data <- data.matrix(cell.prop$Est.prop.weighted)
# all cells
order.names <- c("NE_Basal", "NE_Suprabasal", "NE_Suprabasal-Dividing", "NE_Intermediate",
"NE_Superficial", "NG_Undifferentiated", "NG_Undifferentiated-Dividing", "NG_Foveolar-Intermediate",
"NG_Foveolar-differentiated", "NG_Chief", "NG_Parietal", "NG_Endocrine-GHRL",
"NG_Endocrine-CHGA", "NG_Endocrine-NEUROD1", "BE_Columnar-Undifferentiated",
"BE_Columnar-Undifferentiated-Dividing", "BE_Endocrine-NEUROG3", "BE_Columnar-Intermediate",
"BE_Columnar-differentiated", "BE_Goblet", "SMG_Duct-Intercalating", "SMG_Oncocytes-MUC5B-Low",
"SMG_Mucous-MUC5B-High", "ALL_Immune-T-cells", "ALL_Immune-B-cells", "ALL_Immune-Macrophages",
"ALL_Stromal-CALD1-cells", "ALL_Stromal-ADH1B-cells", "ALL_Stromal-GNG11-cells")
plot.data <- plot.data[, order.names]
# get information about the cell types
colID <- sapply(colnames(plot.data), strsplit, "_")
colID <- data.frame(matrix(unlist(colID), nrow = length(colID), byrow = T))
rownames(colID) <- colnames(plot.data)
colnames(colID) <- c("Tissue", "Cell Type")
This image contains a relative score for each cluster fo cell types from each the tissue type. The rows are patients.
# make a copy of data
plot.data_copy <- plot.data
plot.data <- plot.data[rownames(rowID_print)[rowID_print$Sample.Type == "Solid Tissue Normal"],
]
plot.data <- plot.data[rownames(rowID_print)[c(which(rowID_print$Project.ID == "Squamous"),
which(rowID_print$Project.ID == "Gastric"), which(rowID_print$Project.ID == "Barrett's"))],
]
breaksList5 = seq(0, 1, by = 0.025)
pheatmap(plot.data, annotation_row = rowID_print[, 1, drop = FALSE], breaks = breaksList5,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList5)),
main = "Relative score", cluster_cols = FALSE, show_rownames = FALSE, clustering_method = "ward.D2",
cluster_rows = FALSE, annotation_colors = anno_col, annotation_col = colID[,
1, drop = FALSE], labels_col = as.character(colID[colnames(plot.data), 2]))
plotdata4 <- as.data.frame(matrix(ncol = 6, nrow = nrow(plot.data)))
colnames(plotdata4) <- c("Sample", "NE", "NG", "BE", "SMG", "ALL")
plotdata4$Sample <- rownames(plot.data)
plotdata4$BE <- rowSums(plot.data[, colnames(plot.data) %in% rownames(colID)[colID$Tissue ==
"BE"]])
plotdata4$NG <- rowSums(plot.data[, colnames(plot.data) %in% rownames(colID)[colID$Tissue ==
"NG"]])
plotdata4$NE <- rowSums(plot.data[, colnames(plot.data) %in% rownames(colID)[colID$Tissue ==
"NE"]])
plotdata4$SMG <- rowSums(plot.data[, colnames(plot.data) %in% rownames(colID)[colID$Tissue ==
"SMG"]])
plotdata4$ALL <- rowSums(plot.data[, colnames(plot.data) %in% rownames(colID)[colID$Tissue ==
"ALL"]])
plotdata4.2 <- as.matrix(plotdata4[, 2:6])
rownames(plotdata4.2) <- plotdata4$Sample
This image containes a relative score for each tissue (scores for clusters from each tissue were add) The rows are patients. THe most similar cell are BE cells.
pheatmap(plotdata4.2, annotation_row = rowID_print[, 1, drop = FALSE], breaks = breaksList5,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList5)),
main = "Relative score per tissue type", cluster_cols = FALSE, show_rownames = FALSE,
clustering_method = "ward.D2", cluster_rows = FALSE, annotation_colors = anno_col)
tmp <- c(names(sort(plotdata4.2[rownames(rowID_print)[which(rowID_print$Project.ID ==
"Squamous")], 1], decreasing = TRUE)), names(sort(plotdata4.2[rownames(rowID_print)[which(rowID_print$Project.ID ==
"Gastric")], 2], decreasing = TRUE)), names(sort(plotdata4.2[rownames(rowID_print)[which(rowID_print$Project.ID ==
"Barrett's")], 3], decreasing = TRUE)))
breaksList5 = seq(0, 1, by = 0.01)
pheatmap(plot.data[tmp, ], annotation_row = rowID_print[, 1, drop = FALSE], breaks = breaksList5,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList5)),
main = "Relative score", cluster_cols = FALSE, show_rownames = FALSE, clustering_method = "ward.D2",
cluster_rows = FALSE, annotation_colors = anno_col, annotation_col = colID[,
1, drop = FALSE], labels_col = as.character(colID[colnames(plot.data), 2]),
gaps_row = c(12, 23), file = paste0(files.out, "/Supplementary_figures/S19/Figure_S19A_all.pdf"))
pheatmap(plotdata4.2[tmp, ], annotation_row = rowID_print[, 1, drop = FALSE], breaks = breaksList5,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList5)),
main = "Relative score per tissue type", cluster_cols = FALSE, show_rownames = FALSE,
clustering_method = "ward.D2", cluster_rows = FALSE, annotation_colors = anno_col,
gaps_row = c(12, 23), file = paste0(files.out, "/Supplementary_figures/S19/Figure_S19B_all.pdf"))
This image containes a relative score for each cluster fo cell types from each the tissue type. The rows are patients. THe most similar cell types are BE undifferentiated clusters.
plot.data <- plot.data_copy
breaksList5 = seq(0, 1, by = 0.01)
plotdata4 <- as.data.frame(matrix(ncol = 6, nrow = nrow(plot.data)))
colnames(plotdata4) <- c("Sample", "NE", "NG", "BE", "SMG", "ALL")
plotdata4$Sample <- rownames(plot.data)
plotdata4$BE <- rowSums(plot.data[, colnames(plot.data) %in% rownames(colID)[colID$Tissue ==
"BE"]])
plotdata4$NG <- rowSums(plot.data[, colnames(plot.data) %in% rownames(colID)[colID$Tissue ==
"NG"]])
plotdata4$NE <- rowSums(plot.data[, colnames(plot.data) %in% rownames(colID)[colID$Tissue ==
"NE"]])
plotdata4$SMG <- rowSums(plot.data[, colnames(plot.data) %in% rownames(colID)[colID$Tissue ==
"SMG"]])
plotdata4$ALL <- rowSums(plot.data[, colnames(plot.data) %in% rownames(colID)[colID$Tissue ==
"ALL"]])
plotdata4.2 <- as.matrix(plotdata4[, 2:6])
rownames(plotdata4.2) <- plotdata4$Sample
tmp <- c(names(sort(plotdata4.2[rownames(rowID_print)[which(rowID_print$Project.ID ==
"ICGC-ESAD" & rowID_print$primary_diagnosis == "Adenocarcinoma")], 3], decreasing = TRUE)))
pheatmap(plot.data[tmp, ], annotation_row = rowID_EAC[, c(1, 3, 4, 6), drop = FALSE],
breaks = breaksList5, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList5)),
main = "Relative score", cluster_cols = FALSE, show_rownames = FALSE, clustering_method = "ward.D2",
cluster_rows = TRUE)
This image containes a relative score for each tissue (scores for clusters from each tissue were add) The rows are patients. THe most similar cell are BE cells.
pheatmap(plotdata4.2[tmp, ], annotation_row = rowID_EAC[, c(1, 2, 4, 6), drop = FALSE],
breaks = breaksList5, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList5)),
main = "Relative score per tissue type", cluster_cols = FALSE, show_rownames = FALSE,
clustering_method = "ward.D2", cluster_rows = TRUE)
### Plot data with clinical grouping
The following images contain plots of differences between contribution of different cell types to different clinical clasification.
# Prepare data all cell types
ggdata0 <- melt(cbind(plot.data[tmp, ], rowID_EAC[tmp, c(1, 2, 3, 4, 5, 6, 7), drop = FALSE],
rownames(plotdata4.2[tmp, ])), measure.vars = 1:22, variable.name = "Tissue")
# prepare data for combined tissues
ggdata <- melt(cbind(plotdata4.2[tmp, ], rowID_EAC[tmp, c(1, 2, 3, 4, 5, 6, 7), drop = FALSE],
rownames(plotdata4.2[tmp, ])), measure.vars = c("NE", "NG", "BE", "SMG", "ALL"),
variable.name = "Tissue")
p <- ggplot(ggdata0, aes(x = Tissue, y = value, fill = RP.SiewertClassification)) +
geom_boxplot(position = position_dodge(1)) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("Siewert Type")
p
p <- ggplot(ggdata, aes(x = Tissue, y = value, fill = RP.SiewertClassification)) +
geom_boxplot(position = position_dodge(1)) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("Siewert Type")
p
p <- ggplot(ggdata0, aes(x = Tissue, y = value, fill = RP.BarrettsAdjacent)) + geom_boxplot(position = position_dodge(1)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Barrett's Adjacent")
p
p <- ggplot(ggdata, aes(x = Tissue, y = value, fill = RP.BarrettsAdjacent)) + geom_boxplot(position = position_dodge(1)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
ggtitle("Barrett's Adjacent")
p
p <- ggplot(ggdata0, aes(x = Tissue, y = value, fill = RP.TumourGradingDifferentiationStatus)) +
geom_boxplot(position = position_dodge(1)) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("Differentiation Status")
p
p <- ggplot(ggdata, aes(x = Tissue, y = value, fill = RP.TumourGradingDifferentiationStatus)) +
geom_boxplot(position = position_dodge(1)) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("Differentiation Status")
p
p <- ggplot(ggdata0, aes(x = Tissue, y = value, fill = RP.TStage.PrimaryTumour)) +
geom_boxplot(position = position_dodge(1)) + theme(axis.text.x = element_text(angle = 45,
hjust = 1), plot.title = element_text(hjust = 0.5)) + ggtitle("T Stage")
p
# get anova statistcs
anova(lm(value ~ Tissue + RP.BarrettsAdjacent + RP.Location + RP.SiewertClassification +
RP.TumourGradingDifferentiationStatus + RP.TStage.PrimaryTumour, data = ggdata0))
Analysis of Variance Table
Response: value Df Sum Sq Mean Sq F value Pr(>F)
Tissue 21 4.8666 0.231743 130.5925 <2e-16 *** RP.BarrettsAdjacent 1 0.0003 0.000348 0.1961 0.6579
RP.Location 2 0.0006 0.000318 0.1790 0.8361
RP.SiewertClassification 2 0.0039 0.001962 1.1056 0.3312
RP.TumourGradingDifferentiationStatus 5 0.0049 0.000979 0.5519 0.7370
RP.TStage.PrimaryTumour 5 0.0105 0.002101 1.1842 0.3144
Residuals 2383 4.2287 0.001775
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1
# get anova statistcs
anova(lm(value ~ Tissue + RP.BarrettsAdjacent + RP.Location + RP.SiewertClassification +
RP.TumourGradingDifferentiationStatus + RP.TStage.PrimaryTumour, data = ggdata))
Analysis of Variance Table
Response: value Df Sum Sq Mean Sq F value Pr(>F)
Tissue 4 28.987 7.2467 497.77 <2e-16 *** RP.BarrettsAdjacent 1 0.000 0.0000 0.00 1
RP.Location 2 0.000 0.0000 0.00 1
RP.SiewertClassification 2 0.000 0.0000 0.00 1
RP.TumourGradingDifferentiationStatus 5 0.000 0.0000 0.00 1
RP.TStage.PrimaryTumour 5 0.000 0.0000 0.00 1
Residuals 530 7.716 0.0146
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1
This are the figures for the paper. I use all cancer and I sort them according to contribution of specific cell types (BE for Adenos and NE for Squamous)
# remove non epithelial data
plotdata4.3 <- plotdata4.2[, colnames(plotdata4.2) != "ALL"]
# re-normalise
plotdata4.4 <- plotdata4.3/rowSums(plotdata4.3)
tmp <- c(names(sort(plotdata4.4[rownames(rowID_print)[which(rowID_print$Project.ID ==
"TCGA-ESCA" & rowID_print$primary_diagnosis == "Squamous Carcinoma")], 1], decreasing = TRUE)),
names(sort(plotdata4.4[rownames(rowID_print)[which(rowID_print$Project.ID ==
"TCGA-ESCA" & rowID_print$primary_diagnosis == "Adenocarcinoma")], 3], decreasing = TRUE)),
names(sort(plotdata4.4[rownames(rowID_print)[which(rowID_print$Project.ID ==
"ICGC-ESAD" & rowID_print$primary_diagnosis == "Adenocarcinoma")], 3], decreasing = TRUE)))
pheatmap(plotdata4.2[tmp, ], annotation_row = rowID_print[, c(1, 4), drop = FALSE],
breaks = breaksList5, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList5)),
main = "Relative score per tissue type", cluster_cols = FALSE, show_rownames = FALSE,
clustering_method = "ward.D2", cluster_rows = FALSE)
pheatmap(plotdata4.4[tmp, ], annotation_row = rowID_print[, c(1, 4), drop = FALSE],
breaks = breaksList5, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList5)),
main = "Relative score per tissue type", cluster_cols = FALSE, show_rownames = FALSE,
clustering_method = "ward.D2", cluster_rows = FALSE)
pheatmap(plot.data[tmp, ], annotation_row = rowID_print[, c(1, 4), drop = FALSE],
breaks = breaksList5, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList5)),
main = "Relative score per tissue type", cluster_cols = FALSE, show_rownames = FALSE,
clustering_method = "ward.D2", cluster_rows = FALSE, annotation_colors = anno_col)
plot.data4.4 <- plot.data[tmp, 1:23]
plot.data4.4 <- plot.data4.4/rowSums(plot.data4.4)
pheatmap(plotdata4.4[tmp, ], annotation_row = rowID_print[, c(1, 4), drop = FALSE],
breaks = breaksList5, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList5)),
main = "Relative score per tissue type", cluster_cols = FALSE, show_rownames = FALSE,
clustering_method = "ward.D2", cluster_rows = FALSE, annotation_colors = anno_col)
pheatmap(plot.data4.4, annotation_row = rowID_print[, c(1, 4), drop = FALSE], breaks = breaksList5,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList5)),
main = "Relative score, figure 5A", cluster_cols = FALSE, show_rownames = FALSE,
clustering_method = "ward.D2", cluster_rows = FALSE, annotation_colors = anno_col,
annotation_col = colID[, 1, drop = FALSE], labels_col = as.character(colID[colnames(plot.data4.4),
2])[1:23], gaps_row = c(81, 161))
pheatmap(plotdata4.4[tmp, ], annotation_row = rowID_print[, c(1, 4), drop = FALSE],
breaks = breaksList5, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList5)),
main = "Relative score per tissue type, figure 4B", cluster_cols = FALSE, show_rownames = FALSE,
clustering_method = "ward.D2", cluster_rows = FALSE, annotation_colors = anno_col,
gaps_row = c(81, 161))
pheatmap(plot.data4.4, annotation_row = rowID_print[, c(1, 4), drop = FALSE], breaks = breaksList5,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList5)),
main = "Relative score, figure 5A", cluster_cols = FALSE, show_rownames = FALSE,
clustering_method = "ward.D2", cluster_rows = FALSE, annotation_colors = anno_col,
annotation_col = colID[, 1, drop = FALSE], labels_col = as.character(colID[colnames(plot.data4.4),
2])[1:23], gaps_row = c(81, 161), file = paste0(files.out, "/Figure_5//Figure_5A.pdf"),
width = 15)
pheatmap(plotdata4.4[tmp, ], annotation_row = rowID_print[, c(1, 4), drop = FALSE],
breaks = breaksList5, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList5)),
main = "Relative score per tissue type, figure 5B", cluster_cols = FALSE, show_rownames = FALSE,
clustering_method = "ward.D2", cluster_rows = FALSE, annotation_colors = anno_col,
gaps_row = c(81, 161), file = paste0(files.out, "/Figure_5//Figure_5B.pdf"))
5 ## End Matters
To finish get session info:
R version 3.6.2 (2019-12-12) Platform: x86_64-redhat-linux-gnu (64-bit) Running under: Fedora 31 (Workstation Edition)
Matrix products: default BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
locale: [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
[5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
[7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
attached base packages: [1] parallel stats4 stats graphics grDevices utils datasets [8] methods base
other attached packages: [1] destiny_3.0.1 edgeR_3.28.0
[3] limma_3.42.2 dbscan_1.1-5
[5] princurve_2.1.4 dynamicTreeCut_1.63-1
[7] scater_1.14.6 Rtsne_0.15
[9] reshape2_1.4.3 RColorBrewer_1.1-2
[11] xbioc_0.1.18 AnnotationDbi_1.48.0
[13] Matrix_1.2-18 pheatmap_1.0.12
[15] MuSiC_0.1.1 ggplot2_3.2.1
[17] nnls_1.4 scran_1.14.6
[19] SingleCellExperiment_1.8.0 SummarizedExperiment_1.16.1 [21] DelayedArray_0.12.2 BiocParallel_1.20.1
[23] matrixStats_0.55.0 Biobase_2.46.0
[25] GenomicRanges_1.38.0 GenomeInfoDb_1.22.0
[27] IRanges_2.20.2 S4Vectors_0.24.3
[29] BiocGenerics_0.32.0 rmarkdown_2.1
loaded via a namespace (and not attached): [1] readxl_1.3.1 RcppEigen_0.3.3.7.0 plyr_1.8.5
[4] igraph_1.2.4.2 lazyeval_0.2.2 sp_1.3-2
[7] RcppHNSW_0.2.0 digest_0.6.24 htmltools_0.4.0
[10] viridis_0.5.1 magrittr_1.5 memoise_1.1.0
[13] openxlsx_4.1.4 xts_0.12-0 MCMCpack_1.4-6
[16] colorspace_1.4-1 blob_1.2.1 haven_2.2.0
[19] xfun_0.12 dplyr_0.8.4 hexbin_1.28.1
[22] crayon_1.3.4 RCurl_1.98-1.1 zoo_1.8-7
[25] glue_1.3.1 registry_0.5-1 gtable_0.3.0
[28] zlibbioc_1.32.0 XVector_0.26.0 MatrixModels_0.4-1
[31] car_3.0-6 BiocSingular_1.2.1 DEoptimR_1.0-8
[34] abind_1.4-5 SparseM_1.78 VIM_5.1.0
[37] scales_1.1.0 ggplot.multistats_1.0.0 DBI_1.1.0
[40] ggthemes_4.2.0 bibtex_0.4.2.2 Rcpp_1.0.3
[43] viridisLite_0.3.0 xtable_1.8-4 laeken_0.5.1
[46] dqrng_0.2.1 proxy_0.4-23 foreign_0.8-72
[49] bit_1.1-15.2 rsvd_1.0.2 vcd_1.4-5
[52] farver_2.0.3 pkgconfig_2.0.3 nnet_7.3-12
[55] locfit_1.5-9.1 labeling_0.3 tidyselect_1.0.0
[58] rlang_0.4.4 munsell_0.5.0 cellranger_1.1.0
[61] tools_3.6.2 RSQLite_2.2.0 ranger_0.12.1
[64] batchelor_1.2.4 evaluate_0.14 stringr_1.4.0
[67] yaml_2.2.1 mcmc_0.9-6.1 knitr_1.28
[70] bit64_0.9-7 zip_2.0.4 robustbase_0.93-5
[73] purrr_0.3.3 quantreg_5.54 formatR_1.7
[76] compiler_3.6.2 beeswarm_0.2.3 curl_4.3
[79] e1071_1.7-3 smoother_1.1 tibble_2.1.3
[82] statmod_1.4.33 stringi_1.4.5 RSpectra_0.16-0
[85] forcats_0.4.0 lattice_0.20-38 vctrs_0.2.2
[88] pillar_1.4.3 lifecycle_0.1.0 BiocManager_1.30.10
[91] lmtest_0.9-37 BiocNeighbors_1.4.1 data.table_1.12.8
[94] bitops_1.0-6 irlba_2.3.3 pcaMethods_1.78.0
[97] R6_2.4.1 gridExtra_2.3 rio_0.5.16
[100] vipor_0.4.5 codetools_0.2-16 boot_1.3-23
[103] MASS_7.3-51.4 assertthat_0.2.1 pkgmaker_0.31
[106] withr_2.1.2 GenomeInfoDbData_1.2.2 hms_0.5.3
[109] grid_3.6.2 tidyr_1.0.2 coda_0.19-3
[112] class_7.3-15 DelayedMatrixStats_1.8.0 carData_3.0-3
[115] TTR_0.23-6 scatterplot3d_0.3-41 ggbeeswarm_0.6.0